library(tidyverse)
library(sf)

######################################################
###################### FIG 1 #########################
######################################################

fig1a<-ggplot(a, aes(x = factor(li_community), y = proportion, fill = financing)) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.9)) + 
  geom_text(aes(label = round(proportion, 1)),  
            position = position_dodge(width = 0.9), 
            vjust = -0.5,  
            size = 7 / .pt) +    
  labs(title = "A. Percentage of projects by \n low-income status and financing model",
       x = "Low-income neighborhood",
       y = "Percentage",
       fill = "Financing model") +
  scale_fill_manual(values = hcl.colors(n = length(unique(a$financing)), palette = "RedOr")) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(size = 7),  
    axis.text.y = element_text(size = 7),
    axis.title.x = element_text(size = 7),
    axis.title.y = element_text(size = 7),
    plot.title = element_text(size = 7),       
    legend.title = element_text(size = 7),     
    legend.text = element_text(size = 7)    
  ) +
  guides(fill = guide_legend(title = "Financing model"))


fig1b<-ggplot(b, aes(x = factor(majority_status), y = proportion, fill = financing)) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.9)) + 
  geom_text(aes(label = round(proportion, 1)),  
            position = position_dodge(width = 0.9),  
            vjust = -0.5,  
            size = 7 / .pt) +    
  labs(title = "B. Percentage of projects by \n majority status and financing model",
       x = "Majority Status",
       y = "Percentage",
       fill = "Financing model") +
  scale_x_discrete(labels = c("White", "Asian", "Black", "Hispanic", "No Majority")) + 
  scale_fill_manual(values = hcl.colors(n = length(unique(b$financing)), palette = "RedOr")) + 
  theme_minimal() +
  theme(
    axis.text.x = element_text(size = 7),    
    axis.text.y = element_text(size = 7),
    axis.title.x = element_text(size = 7),   
    axis.title.y = element_text(size = 7),
    plot.title = element_text(size = 7),      
    legend.title = element_text(size = 7),    
    legend.text = element_text(size = 7)     
  ) +
  guides(fill = guide_legend(title = "Financing model"))


fig1 <- fig1a+ fig1b


######################################################
###################### FIG 2 #########################
######################################################
y_breaks <- c(1.8, 1)
financing_levels <- levels(as.factor(c$financing))

fig2a<-ggplot(c, aes(x = predicted_values, y = y_offset, color = factor(li_community))) +
  geom_point(size = 1.5) +  
  geom_errorbar(aes(xmin = ci_lower, xmax = ci_upper), width = 0.04, size = 0.5) +  
  theme_minimal() +
  labs(x = "Estimated monthly generation (kWh)", y = "Financing model", color = "Low-income \n neighborhood") +
  scale_color_manual(values = hcl.colors(n = length(unique(c$li_community)), palette = "Zissou 1")) +  
  scale_y_continuous(
    breaks = y_breaks,
    labels = rev(financing_levels)
  ) +
  theme(
    axis.text = element_text(size = 7),  
    axis.title = element_text(size = 7), 
    legend.text = element_text(size = 7), 
    legend.title = element_text(size = 7)
  )




y_breaks <- c(1.8, 1)
financing_levels <- levels(as.factor(d$financing))
majority_status_labels <- c("No majority", "Hispanic", "Black", "White")  

fig2b<- ggplot(d, aes(
  x = predicted_values,
  y = y_offset,
  color = factor(majority_status, levels = rev(levels(d$majority_status)))
)) +
  geom_point(size = 1.5) + 
  geom_errorbar(aes(xmin = ci_lower, xmax = ci_upper), width = 0.04, size = 0.5) +  
  theme_minimal() +
  labs(x = "Estimated monthly generation (kWh)", y = "Financing model", color = "Majority status") +
  scale_y_continuous(
    breaks = y_breaks,
    labels = rev(financing_levels)
  ) +
  scale_color_manual(
    values = hcl.colors(n = length(unique(b$majority_status)), palette = "Zissou 1"),
    labels = majority_status_labels
  ) +
  theme(
    axis.text = element_text(size = 7),  
    axis.title = element_text(size = 7), 
    legend.text = element_text(size = 7),
    legend.title = element_text(size = 7) 
  )


fig2 <- fig2a+ fig2b

######################################################
###################### FIG 3 #########################
######################################################

fig3a<-ggplot(e, aes(x = installer, y = percent, fill = li_community)) +
  geom_bar(stat = "identity") +
  coord_flip() +
  labs(x = "", y = "Proportion of Installations (%)", fill = "Low-income \n neighborhood") +
  scale_fill_manual(
    values = hcl.colors(n = length(unique(e$li_community)), palette = "Red-Blue"),
    labels = c("No", "Yes") 
  ) +
  geom_hline(yintercept = 24.25, linetype = "dashed", color = "black", size = 1) +  
  theme_minimal() +
  theme(
    axis.text.x = element_text(size = 7),       
    axis.text.y = element_blank(),              
    axis.title = element_text(size = 7),        
    plot.title = element_text(size = 7),        
    legend.title = element_text(size = 7),      
    legend.text = element_text(size = 7),       
    legend.key.size = unit(4, "mm"),           
    legend.position = "bottom"                 
  )

fig3b <- ggplot(f, aes(x = predicted_values, y = reorder(installer, predicted_values))) +
  geom_point() +
  geom_errorbarh(aes(xmin = ci_lower, xmax = ci_upper), height = 0.2) +
  labs(title = "",
       x = "Estimated monthly generation (kWh)",
       y = "") +
  theme_minimal() +
  theme(
    axis.text.x = element_text(size = 7),
    axis.text.y = element_blank(),  
    axis.ticks.y = element_blank(),  
    axis.title = element_text(size = 7),
    plot.title = element_text(size = 7)
  )

fig3c<-ggplot(g, aes(x = installer, y = percent, fill = majority_category)) +
  geom_bar(stat = "identity", position = "stack") +
  coord_flip() +
  geom_hline(yintercept = 18.11, linetype = "dashed", color = "black", size = 1) +  
  geom_text(aes(y = 90, label = total_projects), hjust = 0, size = 5 / .pt) + 
  scale_fill_manual(
    values = hcl.colors(n = length(unique(g$majority_category)), palette = "Red-Blue"),
    labels = c("White", "Non-White")  
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(size = 7),       
    axis.text.y = element_blank(),              
    axis.title = element_text(size = 7),        
    plot.title = element_text(size = 7),        
    legend.title = element_text(size = 7),      
    legend.text = element_text(size = 7),      
    legend.key.size = unit(4, "mm"),            
    legend.position = "bottom"                 
  )





######################################################
###################### FIG 4 #########################
######################################################


# Coordinates for Bridgeport
inset_xmin_1 <- -73.25
inset_xmax_1 <- -73.1
inset_ymin_1 <- 41.14
inset_ymax_1 <- 41.25

# Coordinates for Hartford
inset_xmin_2 <- -72.8
inset_xmax_2 <- -72.5
inset_ymin_2 <- 41.7
inset_ymax_2 <- 41.9

fig4a <- ggplot() +
  geom_sf(data = mapA, 
          aes(fill = normalized_installation), 
          color = "gray", lwd = 0.1) +  
  scale_fill_viridis_c(
    option = "plasma", 
    name = "Installations per \n 1,000 households",  
    trans = "log",  
    labels = scales::label_number(accuracy = 1, big.mark = ",")  
  ) +
  
  geom_rect(aes(xmin = inset_xmin_1, xmax = inset_xmax_1, ymin = inset_ymin_1, ymax = inset_ymax_1), 
            color = "yellow", fill = NA, lwd = 0.3) +  
  
  # Add the second bounding box for the Hartford inset
  geom_rect(aes(xmin = inset_xmin_2, xmax = inset_xmax_2, ymin = inset_ymin_2, ymax = inset_ymax_2), 
            color = "yellow", fill = NA, lwd = 0.3) +  
  
  labs(title = "A. Installation density by Census tract \n for the five installers associated with \n the highest generation") +
  theme_minimal() +
  theme(
    axis.text = element_text(size = 7),         
    axis.title = element_text(size = 7),        
    plot.title = element_text(size = 7),       
    legend.title = element_text(size = 5),     
    legend.text = element_text(size = 5),       
    legend.position = "right",                 
    legend.key.size = unit(4, "mm"),            
    axis.ticks = element_line(size = 0.3), 
    axis.text.x = element_text(size = 5),      
    axis.text.y = element_text(size = 5)       
  )



inset_plot_1 <- ggplot() +
  geom_sf(data = mapA, aes(fill = log(normalized_installation)), color = "gray", lwd = 0.1) +
  scale_fill_viridis_c(option = "plasma", name = "Installation Density") +
  coord_sf(xlim = c(inset_xmin_1, inset_xmax_1), ylim = c(inset_ymin_1, inset_ymax_1)) +  
  theme_void() +  
  theme(legend.position = "none")  

inset_plot_2 <- ggplot() +
  geom_sf(data = mapA, aes(fill = log(normalized_installation)), color = "gray", lwd = 0.1) +
  scale_fill_viridis_c(option = "plasma", name = "Installation Density") +
  coord_sf(xlim = c(inset_xmin_2, inset_xmax_2), ylim = c(inset_ymin_2, inset_ymax_2)) +  
  theme_void() +  
  theme(legend.position = "none") 

fig4a <- ggdraw() +
  draw_plot(fig4a) +  
  
  draw_plot(inset_plot_1, x = 0.4, y = 0.01, width = 0.2, height = 0.2) +
  draw_label("Bridgeport", x = 0.48, y = 0.23, size = 7,  color = "black") +  
  
  draw_plot(inset_plot_2, x = 0.8, y = 0.68, width = 0.2, height = 0.2) +
  draw_label("Hartford", x = 0.87, y = 0.9, size = 7,  color = "black") 





fig4b <- ggplot() +
  geom_sf(data = mapB, 
          aes(fill = normalized_installation), 
          color = "gray", lwd = 0.1) +  
  scale_fill_viridis_c(
    option = "plasma", 
    name = "Installations per \n 1,000 households",  
    trans = "log",  
    labels = scales::label_number(accuracy = 1, big.mark = ",")  
  ) +
  
 
  geom_rect(aes(xmin = inset_xmin_1, xmax = inset_xmax_1, ymin = inset_ymin_1, ymax = inset_ymax_1), 
            color = "yellow", fill = NA, lwd = 0.3) +  

  geom_rect(aes(xmin = inset_xmin_2, xmax = inset_xmax_2, ymin = inset_ymin_2, ymax = inset_ymax_2), 
            color = "yellow", fill = NA, lwd = 0.3) +  
  
  labs(title = "B. Installation density by Census tract \n for the five installers associated with \n the lowest generation") +
  theme_minimal() +
  theme(
    axis.text = element_text(size = 7),         
    axis.title = element_text(size = 7),       
    plot.title = element_text(size = 7),        
    legend.title = element_text(size = 5),      
    legend.text = element_text(size = 5),      
    legend.position = "right",                 
    legend.key.size = unit(4, "mm"),           
    axis.ticks = element_line(size = 0.3),
    axis.text.x = element_text(size = 5),       
    axis.text.y = element_text(size = 5)       
  )

inset_plot_1 <- ggplot() +
  geom_sf(data = mapB, aes(fill = log(normalized_installation)), color = "gray", lwd = 0.1) +
  scale_fill_viridis_c(option = "plasma", name = "Installation Density") +
  coord_sf(xlim = c(inset_xmin_1, inset_xmax_1), ylim = c(inset_ymin_1, inset_ymax_1)) +  
  theme_void() +  
  theme(legend.position = "none")  

inset_plot_2 <- ggplot() +
  geom_sf(data = mapB, aes(fill = log(normalized_installation)), color = "gray", lwd = 0.1) +
  scale_fill_viridis_c(option = "plasma", name = "Installation Density") +
  coord_sf(xlim = c(inset_xmin_2, inset_xmax_2), ylim = c(inset_ymin_2, inset_ymax_2)) +  
  theme_void() +  
  theme(legend.position = "none")  

fig4b <- ggdraw() +
  draw_plot(fig4b) + 
  
  draw_plot(inset_plot_1, x = 0.4, y = 0.01, width = 0.2, height = 0.2) +
  draw_label("Bridgeport", x = 0.48, y = 0.23, size = 7,  color = "black") +  
  

  draw_plot(inset_plot_2, x = 0.8, y = 0.68, width = 0.2, height = 0.2) +
  draw_label("Hartford", x = 0.87, y = 0.9, size = 7,  color = "black")  



fig4c <- ggplot() +
  geom_sf(data = mapC, aes(fill = li_community), color = "black", lwd = 0.05) +
  scale_fill_manual(
    values = c("Yes" = "purple", "No" = "lightgray"),
    name = "Low-income \n neighborhood"
  ) +
  geom_rect(aes(xmin = inset_xmin_1, xmax = inset_xmax_1, ymin = inset_ymin_1, ymax = inset_ymax_1), 
            color = "yellow", fill = NA, lwd = 0.3) +
  geom_rect(aes(xmin = inset_xmin_2, xmax = inset_xmax_2, ymin = inset_ymin_2, ymax = inset_ymax_2), 
            color = "yellow", fill = NA, lwd = 0.3) +
  labs(title = "C. Low-income Census tracts",
       fill = "Low-income \n neighborhood") +
  coord_sf(expand = FALSE) +  
  theme_minimal() +
  theme(
    axis.text = element_text(size = 7),
    axis.title = element_text(size = 7),
    plot.title = element_text(size = 7),
    legend.title = element_text(size = 7),
    legend.text = element_text(size = 7),
    legend.key.size = unit(3, "mm"),
    axis.ticks = element_line(size = 0.3), 
    axis.text.x = element_text(size = 5),
    axis.text.y = element_text(size = 5)
  )

inset_plot_lmi_1 <- ggplot() +
  geom_sf(data = mapC, aes(fill = li_community), color = "black", lwd = 0.1) +
  scale_fill_manual(values = c("Yes" = "purple", "No" = "lightgray"), name = "LMI Status") +
  coord_sf(xlim = c(inset_xmin_1, inset_xmax_1), ylim = c(inset_ymin_1, inset_ymax_1)) + 
  theme_void() +  
  theme(legend.position = "none")  

inset_plot_lmi_2 <- ggplot() +
  geom_sf(data = mapC, aes(fill = li_community), color = "black", lwd = 0.1) +
  scale_fill_manual(values = c("Yes" = "purple", "No" = "lightgray"), name = "LMI Status") +
  coord_sf(xlim = c(inset_xmin_2, inset_xmax_2), ylim = c(inset_ymin_2, inset_ymax_2)) +  
  theme_void() + 
  theme(legend.position = "none") 

fig4c <- ggdraw() +
  draw_plot(fig4c) + 
  
  draw_plot(inset_plot_lmi_1, x = 0.4, y = 0.01, width = 0.2, height = 0.2) +
  draw_label("Bridgeport", x = 0.48, y = 0.23, size = 7,  color = "black") + 
  
  # Add the second inset (Hartford) and label it
  draw_plot(inset_plot_lmi_2, x = 0.8, y = 0.68, width = 0.2, height = 0.2) +
  draw_label("Hartford", x = 0.87, y = 0.9, size = 7,  color = "black") 



fig4d <- ggplot() +
  geom_sf(data = mapD, aes(fill = majority_status_new), color = "black", lwd = 0.05) +  
  scale_fill_manual(
    values = c("White" = "lightgray", "Minority/ No majority" = "orange"), 
    name = "Majority Status",
    labels = c("White", "Non-white")  
  ) +
    geom_rect(aes(xmin = inset_xmin_1, xmax = inset_xmax_1, ymin = inset_ymin_1, ymax = inset_ymax_1), 
            color = "yellow", fill = NA, lwd = 0.3) +
  
  geom_rect(aes(xmin = inset_xmin_2, xmax = inset_xmax_2, ymin = inset_ymin_2, ymax = inset_ymax_2), 
            color = "yellow", fill = NA, lwd = 0.3) +
  
  labs(title = "D. Non-white-majority Census tracts",
       fill = "Majority status") +
  
  coord_sf(expand = FALSE) +
  
  theme_minimal() +
  theme(
    plot.margin = margin(0, 0, 0, 0),       
    axis.text = element_text(size = 7),     
    axis.title = element_text(size = 7),    
    plot.title = element_text(size = 7),    
    legend.title = element_text(size = 7),  
    legend.text = element_text(size = 7),   
    legend.key.size = unit(3, "mm"),        
    axis.ticks = element_line(size = 0.3), 
    axis.text.x = element_text(size = 5),  
    axis.text.y = element_text(size = 5)   
  )


inset_plot_majority_1 <- ggplot() +
  geom_sf(data = mapD, aes(fill = majority_status_new), color = "black", lwd = 0.1) +
  scale_fill_manual(values = c("White" = "lightgray", "Minority/ No majority" = "orange"), name = "Majority Status") +
  coord_sf(xlim = c(inset_xmin_1, inset_xmax_1), ylim = c(inset_ymin_1, inset_ymax_1)) +  
  theme_void() + 
  theme(legend.position = "none") 

inset_plot_majority_2 <- ggplot() +
  geom_sf(data = mapD, aes(fill = majority_status_new), color = "black", lwd = 0.1) +
  scale_fill_manual(values = c("White" = "lightgray", "Minority/ No majority" = "orange"), name = "Majority Status") +
  coord_sf(xlim = c(inset_xmin_2, inset_xmax_2), ylim = c(inset_ymin_2, inset_ymax_2)) +  
  theme_void() +  
  theme(legend.position = "none")  

fig4d <- ggdraw() +
  draw_plot(fig4d) + 
  
  draw_plot(inset_plot_majority_1, x = 0.4, y = 0.01, width = 0.2, height = 0.2) +
  draw_label("Bridgeport", x = 0.48, y = 0.23, size = 7,  color = "black") + 
  
  draw_plot(inset_plot_majority_2, x = 0.8, y = 0.68, width = 0.2, height = 0.2) +
  draw_label("Hartford", x = 0.87, y = 0.9, size = 7,  color = "black")  



#########################################################
################## Z Score calculation ##################
#########################################################
#Note: The calculation needs original data used in the research paper.
# application_date is the date recorded as application date
# sys_cost_w is the pre-incentive dollar amount spent per watt
# data is the dataframe that contains this info

# Function to calculate the z-scores for a specific project within a 6-month window
calculate_project_z_score <- function(application_date, sys_cost_w, data) {
  roll_window_size <- months(3)  
  
  start_date <- application_date - roll_window_size
  end_date <- application_date + roll_window_size
  
  projects_within_window <- data %>%
    filter(application_date >= start_date & application_date <= end_date)
  
  if (nrow(projects_within_window) > 1) {
    if (!any(is.na(projects_within_window$sys_cost_w))) {
      roll_avg <- mean(projects_within_window$sys_cost_w)
      roll_sd <- sd(projects_within_window$sys_cost_w)
      
      if (!is.na(roll_sd) && roll_sd != 0) {
        z_scores <- (sys_cost_w - roll_avg) / roll_sd
      } else {
        z_scores <- 0
      }
    } else {
      z_scores <- 0
    }
  } else {
    z_scores <- 0
  }
  
  return(z_scores)
}


#########################################################
############ Low income classification ##################
#########################################################
#Note: The code below classifies whether a neighborhood is low-income 
# as defined by the IRS
#Note: tidycensus needs API key access

library(tigris)
library(tidycensus)
options(tigris_use_cache = TRUE)

ct_tracts <- tracts(state = "CT", cb = TRUE, year = 2020)

cbsa_data <- core_based_statistical_areas(cb = F, year = 2020)

ct_tracts <- ct_tracts %>%
  st_make_valid() 

cbsa_data <- cbsa_data %>%
  st_make_valid()


tracts_with_cbsa <- st_intersection(ct_tracts, cbsa_data)


tracts_with_cbsa <- tracts_with_cbsa %>%
  st_make_valid() %>% 
  mutate(overlap_area = st_area(.)) %>% 
  group_by(GEOID) %>% 
  slice_max(overlap_area, with_ties = FALSE) %>% 
  ungroup()

tracts_with_cbsa_clean <- tracts_with_cbsa %>%
  select(GEOID, CBSAFP, NAMELSAD, overlap_area, geometry)


tracts_with_cbsa <- tracts_with_cbsa %>%
  mutate(
    Classification = case_when(
      LSAD.1 == "M1" ~ "Metropolitan",
      is.na(LSAD.1) ~ "Non-Metropolitan",
      TRUE ~ "Non-Metropolitan" 
    )
  )

ct_income <- get_acs(
  geography = "state",
  variables = "B19113_001", 
  survey = "acs5",
  year = 2020
) %>%
  filter(NAME == "Connecticut") %>% 
  pull(estimate)


tracts_with_cbsa_clean <- tracts_with_cbsa %>%
  select(GEOID, Classification, NAMELSAD.1, geometry)%>%
  distinct()

msa_income <- get_acs(
  geography = "metropolitan statistical area/micropolitan statistical area",
  variables = "B19113_001",
  survey = "acs5",          
  year = 2020              
)

tracts_with_cbsa_clean <- tracts_with_cbsa_clean %>%
  rename(NAME=NAMELSAD.1)

tracts_with_cbsa_clean <- merge(tracts_with_cbsa_clean, msa_income, by="NAME",all.x=T )

tracts_with_cbsa_clean <- tracts_with_cbsa_clean %>%
  rename(NAME=NAMELSAD.y)

tracts_with_cbsa_clean <- tracts_with_cbsa_clean %>%
  mutate(
    Median_Family_Income = ifelse(
      Classification == "Non-Metropolitan", ct_income, estimate
    )
  )
tracts_with_cbsa_clean <- tracts_with_cbsa_clean %>%
  rename(GEOID.a=GEOID)

tracts_with_cbsa_clean <- tracts_with_cbsa_clean %>%
  rename(GEOID=GEOID.x)


variables <- c(
  total_population = "B01003_001",
  total_poverty="B17020_001",
  below_poverty = "B17020_002",    
  tract_median_family_income = "B19113_001" 
)

ct_poverty_data <- get_acs(
  geography = "tract",
  variables = variables,
  state = "CT",
  survey = "acs5",
  year = 2020
) %>%
  select(GEOID, variable, estimate) %>%
  pivot_wider(names_from = variable, values_from = estimate) 

ct_poverty_data <- ct_poverty_data %>%
  mutate(
    poverty_rate = ifelse(
      total_population > 0,
      (below_poverty / total_poverty) * 100,
      NA
    )
  )


tracts_with_cbsa_clean <- tracts_with_cbsa_clean %>%
  left_join(ct_poverty_data, by  = "GEOID")

tracts_with_cbsa_clean <- tracts_with_cbsa_clean %>%
  distinct(GEOID, .keep_all = TRUE)


statewide_threshold <- ct_income * 0.8

tracts_with_cbsa_clean <- tracts_with_cbsa_clean %>%
  mutate(
    applicable_threshold = case_when(
      Classification == "Metropolitan" ~ pmax(statewide_threshold, Median_Family_Income * 0.8),
      TRUE ~ statewide_threshold
    ),
    is_low_income = ifelse(
      poverty_rate >= 20 | tract_median_family_income <= applicable_threshold,
      "Yes",
      "No"
    )
  )


#########################################################
##########Majority status classification ################
#########################################################
#Note: The percentages by race are derived from ACS census tract-level
# racial composition ("B03002_001", "B03002_003", "B03002_004","B03002_006","B03002_016", "B03002_007", 
#"B03002_015", "B03002_012")

df <-df %>%
  mutate(
    majority_status = ifelse(perc_white > 50, "white", 0),
    majority_status = ifelse(perc_black > 50, "black", majority_status),
    majority_status = ifelse(perc_asian > 50, "asian", majority_status),
    majority_status = ifelse(perc_hispanic > 50, "hisp", majority_status),
    majority_status = ifelse(perc_white <= 50 & perc_black <= 50 & perc_asian <= 50 & perc_hispanic <= 50, "no majority", majority_status)
  )


